############################## # DATA MANIPULATION EXAMPLES # ############################## #Read in the data and call the new object pedigree1 pedigree1 <- read.table(file="pedigree2.ped") #Name the columns #Note there are two columns per snp names(pedigree1) <- c("pedid", "indid", "fid", "mid", "sex", "status", "a11", "a12", "a21", "a22", "a31", "a32", "a41", "a42", "a51", "a52", "a61", "a62", "a71", "a72", "a81", "a82", "a91", "a92", "a101", "a102") #Make the names of the columns available so that you can refer to the columns #Using their name instead of the column numbers attach(pedigree1) #Select the parents parents1 <- pedigree1[fid==0,] #Select the kids kids1 <- pedigree1[fid>0,] #Check whether we did not loose individuals nrow(parents1)+nrow(kids)==nrow(pedigree1) #Look at the parents, which are supposed to be selected at random from the population #to derive some population based quantities #For instance, what is the distribution of the sexes? (it should be 50/50 since we have #generated mom and dad for every family. detach(pedigree1) attach(parents1) table(sex) #gives you a table of counts nrow(parents1) # there are 200 (mom,dad) couples prop.table(table(sex)) # gives you the corresponding table of proportions frequenties <- function(x) { prop.table(table(x)) } apply(parents1,2,frequenties) #to compute the column frequencies; note that these are not the allele frequencies #Let us convert the allele data into genotype data #this time using the entire pedigree file again detach(parents1) attach(pedigree1) g1 <- genotype(cbind(a11,a12)) #snp1 only summary(g1) #gives you genotype and allele frequencies #check LD between two markers g1 <- genotype(cbind(a11,a12)) #snp1 as genotype g2 <- genotype(cbind(a21,a22)) #snp2 as genotype twosnps <- makeGenotypes(data.frame(g1,g2)) LD(twosnps) #check LD between three markers g1 <- genotype(cbind(a11,a12)) #snp1 as genotype g2 <- genotype(cbind(a21,a22)) #snp2 as genotype g3 <- genotype(cbind(a31,a32)) #snp2 as genotype threesnps <- makeGenotypes(data.frame(g1,g2,g3)) LD(threesnps) LDtable(LD(threesnps)) #check HWE using the parents detach(pedigree1) attach(parents1) g1 <- genotype(cbind(a11,a12)) HWE.chisq(g1) #note that you can also perform an exact test, etc #how many kids are there on average in a family detach(parents1) attach(kids1) mean(table(pedid))